In [1]:
import os
## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')
import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
/hpc/group/pbenfeylab/ch416/miniconda3/envs/genesys/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
In [2]:
## Conda Env genesys on DCC
print(torch.__version__)
print(sc.__version__)
1.11.0 1.9.6
In [3]:
## Genes considered/used (shared among samples)
gene_list = pd.read_csv('./gene_list_1108.csv')
Load Data¶
In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
data = pickle.load(file_handle)
batch_size = 2000
#dataset = Root_Dataset(data['X_test'], data['y_test'])
#loader = DataLoader(dataset,
# batch_size = batch_size,
# shuffle = True, drop_last=True)
X_all = np.vstack((data['X_train'],data['X_val'],data['X_test']))
y_all = pd.concat((data['y_train'],data['y_val'],data['y_test']))
dataset = Root_Dataset(X_all, y_all)
loader = DataLoader(dataset,
batch_size = batch_size,
shuffle = True, drop_last=True)
In [5]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [6]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
Extract a sample¶
In [7]:
sample = next(iter(loader))
xo = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
In [8]:
xo.shape
Out[8]:
torch.Size([2000, 11, 17513])
In [9]:
len(y_label)
Out[9]:
2000
In [10]:
len(loader)
Out[10]:
55
In [11]:
## GRN for the transition t0 to t1
for ct in cts:
print(ct)
cws = np.zeros((len(loader), 17513, 17513))
with torch.no_grad():
for i, sample in enumerate(loader):
x = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
#pred_h = model.init_hidden(batch_size)
#tfrom = model.generate_current(x, pred_h, 0).to('cpu').detach().numpy()
#cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
cfrom = x[np.where(np.array(y_label)==ct)[0],0,:]
#pred_h = model.init_hidden(batch_size)
#tto = model.generate_next(x, pred_h, 0).to('cpu').detach().numpy()
#cto = tto[np.where(np.array(y_label)==ct)[0],:]
cto = x[np.where(np.array(y_label)==ct)[0],1,:]
cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
cws[i] = cw
## Calculate mean across number of repeats
cwm = np.mean(cws, axis=0)
ctw[cts.index(ct)] = cwm
Atrichoblast
/tmp/ipykernel_810401/529540224.py:21: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
Trichoblast Cortex Endodermis Pericycle Procambium Xylem Phloem Lateral Root Cap Columella
In [12]:
# Save the array to disk
np.save('genesys_raw_ctw_t0-t1.npy', ctw)
In [11]:
ctw = np.load('genesys_raw_ctw_t0-t1.npy')
In [12]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
if np.std(ctw[i])==0:
ctw_z[i] = np.zeros((17513, 17513))
else:
ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [13]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
ctw_f[i] = np.abs(ctw_z[i]) > threshold
Load TFs list¶
In [14]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [15]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [16]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[16]:
Name NAC001 1 PRE5 1 MYB118 1 MYB21 1 MYB0 1 Name: count, dtype: int64
Network Analysis¶
In [17]:
TFidx = []
for i in wanted_TFs['GeneID']:
if i in gene_list['features'].tolist():
TFidx.append(np.where(gene_list['features']==i)[0][0])
TFidx = np.sort(np.array(TFidx))
In [19]:
def network(i):
## No weights
adj_nw = ctw_f[i]
## Weighted
adj = ctw[i]*ctw_f[i]
## TF only
adj = adj[np.ix_(TFidx,TFidx)]
adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
## Remove no connect
regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
## Reciprocol
keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
#keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
## TF name to keep
TFname = []
for i in np.array(gene_list['features'][TFidx])[keepidx]:
TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
adj = adj[np.ix_(keepidx,keepidx)]
# Create a NetworkX graph for non-directed edges
G = nx.Graph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=abs(weight), distance=1/abs(weight))
## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
closeness_centrality = nx.closeness_centrality(G, distance='distance')
## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
eigenvector_centrality = nx.eigenvector_centrality(G)
# Create a NetworkX graph for diected edges
G = nx.MultiDiGraph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=weight)
## Measures the number of connections (edges) each node has
degree_centrality = nx.degree_centrality(G)
# Calculate outgoing centrality
out_centrality = nx.out_degree_centrality(G)
# Calculate incoming centrality
in_centrality = nx.in_degree_centrality(G)
## Measures the extent to which a node lies on the shortest paths between other nodes.
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
## Non_Reciprocal Out centrality
# Visualize the graph
pos = nx.spring_layout(G) # Positions of the nodes
# Node colors based on weighted betweenness centrality
node_colors = [out_centrality[node] for node in G.nodes()]
# Node sizes based on weighted betweenness centrality
node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]
# Get the edge weights as a dictionary
edge_weights = nx.get_edge_attributes(G, 'weight')
edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
# Scale the edge weights to desired linewidths
max_weight = max(edge_weights.values())
edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
# Draw the graph
nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
# Add node labels
labels = {node: G.nodes[node]['name'] for node in G.nodes}
nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
# Add a colorbar to show the weighted betweenness centrality color mapping
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
sm.set_array([])
plt.colorbar(sm)
# Show the plot
plt.show()
dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
df.index =TFname
df = df.sort_values('betweenness_centrality', ascending=False)
return(df)
In [20]:
atri = network(0)
In [21]:
atri
Out[21]:
| degree_centrality | out_centrality | in_centrality | betweenness_centrality | closeness_centrality | eigenvector_centrality | |
|---|---|---|---|---|---|---|
| PLT1 | 1.072581 | 0.075269 | 0.997312 | 0.308118 | 0.000408 | 0.153906 |
| AT4G00238 | 0.618280 | 0.379032 | 0.239247 | 0.111157 | 0.000299 | 0.120422 |
| WER | 1.172043 | 0.951613 | 0.220430 | 0.097644 | 0.000407 | 0.152317 |
| BRON | 0.948925 | 0.400538 | 0.548387 | 0.094014 | 0.000348 | 0.139071 |
| HDA3 | 1.139785 | 0.844086 | 0.295699 | 0.086065 | 0.000386 | 0.148555 |
| ... | ... | ... | ... | ... | ... | ... |
| NAC082 | 0.016129 | 0.005376 | 0.010753 | 0.000000 | 0.000171 | 0.008932 |
| AT4G23860 | 0.037634 | 0.016129 | 0.021505 | 0.000000 | 0.000187 | 0.019915 |
| EMF2 | 0.034946 | 0.005376 | 0.029570 | 0.000000 | 0.000192 | 0.017595 |
| AT1G74250 | 0.005376 | 0.002688 | 0.002688 | 0.000000 | 0.000123 | 0.002797 |
| AT3G12130 | 0.013441 | 0.005376 | 0.008065 | 0.000000 | 0.000165 | 0.007783 |
373 rows × 6 columns
In [22]:
tri = network(1)
In [23]:
cor = network(2)
In [24]:
end = network(3)
In [25]:
per = network(4)
In [26]:
pro = network(5)
In [27]:
xyl = network(6)
In [28]:
phl = network(7)
In [29]:
lrc = network(8)
In [30]:
col = network(9)
In [31]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [33]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts())
tf_occurance = tf_occurance.rename(columns={
'count': 'tf_occurance'
})
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [34]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[34]:
| atri_betweenness_centrality | tri_betweenness_centrality | lrc_betweenness_centrality | atri_out_centrality | tri_out_centrality | lrc_out_centrality | atri_in_centrality | tri_in_centrality | lrc_in_centrality | centrality_count | centrality_sum |
|---|
In [35]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[35]:
| atri_betweenness_centrality | tri_betweenness_centrality | atri_out_centrality | tri_out_centrality | atri_in_centrality | tri_in_centrality | centrality_count | centrality_sum |
|---|
In [36]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[36]:
| atri_betweenness_centrality | atri_out_centrality | atri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT5G59450 | 0.000101 | 0.051075 | 0.002688 | 3 | 3.053865 |
| GATA10 | 0.000014 | 0.002688 | 0.029570 | 3 | 3.032273 |
| AT3G01890 | 0.000007 | 0.013441 | 0.008065 | 3 | 3.021513 |
| AT5G63700 | 0.000007 | 0.008065 | 0.010753 | 3 | 3.018824 |
In [37]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[37]:
| tri_betweenness_centrality | tri_out_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| ESE3 | 0.00006 | 0.028302 | 0.003145 | 3 | 3.031506 |
| GBF1 | 0.00001 | 0.003145 | 0.003145 | 3 | 3.006299 |
In [38]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[38]:
| lrc_betweenness_centrality | lrc_out_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| SCL14 | 0.000137 | 0.039370 | 0.011024 | 3 | 3.050530 |
| MBF1B | 0.000119 | 0.011024 | 0.004724 | 3 | 3.015867 |
| ZML2 | 0.000079 | 0.004724 | 0.003150 | 3 | 3.007954 |
| AT4G37180 | 0.000077 | 0.014173 | 0.001575 | 3 | 3.015825 |
| SUVH5 | 0.000060 | 0.023622 | 0.006299 | 3 | 3.029981 |
| AT3G43240 | 0.000057 | 0.009449 | 0.004724 | 3 | 3.014230 |
| OFP15 | 0.000042 | 0.177953 | 0.001575 | 3 | 3.179570 |
| AT5G57150 | 0.000030 | 0.007874 | 0.001575 | 3 | 3.009479 |
| SNL4 | 0.000025 | 0.011024 | 0.001575 | 3 | 3.012623 |
| SDG37 | 0.000025 | 0.045669 | 0.004724 | 3 | 3.050419 |
| HSI2 | 0.000025 | 0.037795 | 0.006299 | 3 | 3.044119 |
| AT1G80400 | 0.000020 | 0.011024 | 0.003150 | 3 | 3.014193 |
| AT1G68670 | 0.000017 | 0.050394 | 0.007874 | 3 | 3.058285 |
| DPA | 0.000017 | 0.012598 | 0.003150 | 3 | 3.015765 |
| AT4G16150 | 0.000012 | 0.009449 | 0.014173 | 3 | 3.023634 |
| AT1G77250 | 0.000012 | 0.072441 | 0.001575 | 3 | 3.074028 |
| EIL3 | 0.000010 | 0.009449 | 0.022047 | 3 | 3.031506 |
| AT3G24120 | 0.000010 | 0.003150 | 0.014173 | 3 | 3.017333 |
| AT5G58280 | 0.000010 | 0.031496 | 0.007874 | 3 | 3.039380 |
| AT1G58110 | 0.000007 | 0.003150 | 0.014173 | 3 | 3.017330 |
| TAFII21 | 0.000007 | 0.026772 | 0.020472 | 3 | 3.047252 |
| NPR1 | 0.000005 | 0.020472 | 0.006299 | 3 | 3.026777 |
| HMGB2 | 0.000005 | 0.048819 | 0.001575 | 3 | 3.050399 |
| AT1G55750 | 0.000005 | 0.011024 | 0.001575 | 3 | 3.012603 |
| PTM | 0.000005 | 0.015748 | 0.007874 | 3 | 3.023627 |
| AT3G49800 | 0.000002 | 0.003150 | 0.003150 | 3 | 3.006302 |
In [39]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[39]:
| col_betweenness_centrality | col_out_centrality | col_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| FES1 | 0.085590 | 0.011480 | 0.024235 | 3 | 3.121304 |
| LBD15 | 0.043631 | 0.385204 | 0.002551 | 3 | 3.431386 |
| NAC063 | 0.023060 | 0.033163 | 0.028061 | 3 | 3.084285 |
| AT3G60670 | 0.000764 | 0.501276 | 0.070153 | 3 | 3.572193 |
| AT2G19260 | 0.000083 | 0.017857 | 0.002551 | 3 | 3.020491 |
| TLP7 | 0.000037 | 0.133929 | 0.014031 | 3 | 3.147997 |
| ATRX | 0.000008 | 0.058673 | 0.008929 | 3 | 3.067610 |
| AT1G36060 | 0.000007 | 0.082908 | 0.002551 | 3 | 3.085466 |
| AT1G04390 | 0.000007 | 0.017857 | 0.007653 | 3 | 3.025517 |
| SIGE | 0.000005 | 0.040816 | 0.026786 | 3 | 3.067607 |
| AGL102 | 0.000005 | 0.086735 | 0.001276 | 3 | 3.088015 |
| BHLH101 | 0.000003 | 0.043367 | 0.001276 | 3 | 3.044646 |
| TLP10 | 0.000003 | 0.043367 | 0.007653 | 3 | 3.051024 |
| SIG2 | 0.000002 | 0.042092 | 0.002551 | 3 | 3.044644 |
| ARR9 | 0.000002 | 0.024235 | 0.003827 | 3 | 3.028063 |
| AT1G74840 | 0.000002 | 0.019133 | 0.003827 | 3 | 3.022961 |
| SWI3C | 0.000002 | 0.016582 | 0.001276 | 3 | 3.017859 |
In [40]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[40]:
| cor_betweenness_centrality | end_betweenness_centrality | cor_out_centrality | end_out_centrality | cor_in_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| HMGB6 | 0.005102 | 0.000188 | 0.311774 | 0.349036 | 0.004975 | 0.002141 | 6 | 6.673218 |
| AT5G23405 | 0.000022 | 0.000096 | 0.069652 | 0.036403 | 0.003317 | 0.004283 | 6 | 6.113772 |
| LSMT-L | 0.000033 | 0.000009 | 0.021559 | 0.010707 | 0.054726 | 0.012848 | 6 | 6.099882 |
In [41]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[41]:
| cor_betweenness_centrality | cor_out_centrality | cor_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| FRS12 | 0.006854 | 0.009950 | 0.009950 | 3 | 3.026754 |
| HSFB4 | 0.004162 | 0.334992 | 0.001658 | 3 | 3.340813 |
| pde191 | 0.000515 | 0.162521 | 0.003317 | 3 | 3.166353 |
| AT2G42660 | 0.000430 | 0.034826 | 0.102819 | 3 | 3.138075 |
| SCL3 | 0.000284 | 0.026534 | 0.003317 | 3 | 3.030134 |
| NF-YB12 | 0.000143 | 0.016584 | 0.004975 | 3 | 3.021702 |
| SVP | 0.000072 | 0.039801 | 0.003317 | 3 | 3.043189 |
| GL3 | 0.000039 | 0.013267 | 0.013267 | 3 | 3.026573 |
| AT4G01280 | 0.000033 | 0.003317 | 0.009950 | 3 | 3.013300 |
| LOL2 | 0.000019 | 0.058043 | 0.003317 | 3 | 3.061379 |
| SCR | 0.000017 | 0.053068 | 0.003317 | 3 | 3.056401 |
| AT3G27700 | 0.000017 | 0.013267 | 0.004975 | 3 | 3.018259 |
| TGA7 | 0.000014 | 0.001658 | 0.067993 | 3 | 3.069666 |
| AT1G67310 | 0.000011 | 0.001658 | 0.001658 | 3 | 3.003328 |
| AT5G23930 | 0.000008 | 0.051410 | 0.003317 | 3 | 3.054735 |
| WRKY50 | 0.000006 | 0.013267 | 0.006633 | 3 | 3.019906 |
| AT5G12400 | 0.000003 | 0.006633 | 0.003317 | 3 | 3.009953 |
| CIB5 | 0.000003 | 0.023217 | 0.009950 | 3 | 3.033170 |
| OFP13 | 0.000003 | 0.003317 | 0.028192 | 3 | 3.031512 |
| BSM | 0.000003 | 0.028192 | 0.003317 | 3 | 3.031512 |
In [42]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[42]:
| end_betweenness_centrality | end_out_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT1G62010 | 0.000542 | 0.004283 | 0.004283 | 3 | 3.009108 |
| JKD | 0.000184 | 0.083512 | 0.002141 | 3 | 3.085837 |
| AT4G22360 | 0.000124 | 0.002141 | 0.004283 | 3 | 3.006548 |
| GRF4 | 0.000060 | 0.265525 | 0.002141 | 3 | 3.267726 |
| AT1G62150 | 0.000032 | 0.053533 | 0.002141 | 3 | 3.055707 |
| AT3G05760 | 0.000009 | 0.025696 | 0.002141 | 3 | 3.027846 |
| HSF1 | 0.000009 | 0.004283 | 0.002141 | 3 | 3.006433 |
In [43]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[43]:
| per_betweenness_centrality | pro_betweenness_centrality | xyl_betweenness_centrality | phl_betweenness_centrality | per_out_centrality | pro_out_centrality | xyl_out_centrality | phl_out_centrality | per_in_centrality | pro_in_centrality | xyl_in_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PLIM2b | 0.000331 | 0.000002 | 0.000431 | 0.000271 | 0.042553 | 0.010 | 0.1875 | 0.249595 | 0.006383 | 0.00375 | 0.009868 | 0.011345 | 12 | 12.522029 |
| AT5G09460 | 0.000005 | 0.000047 | 0.000431 | 0.000416 | 0.008511 | 0.035 | 0.1250 | 0.084279 | 0.004255 | 0.01750 | 0.018092 | 0.008104 | 12 | 12.301639 |
In [44]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[44]:
| per_betweenness_centrality | per_out_centrality | per_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| U2AF35B | 0.000009 | 0.006383 | 0.004255 | 3 | 3.010647 |
| AT3G19360 | 0.000005 | 0.004255 | 0.002128 | 3 | 3.006388 |
In [45]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[45]:
| pro_betweenness_centrality | pro_out_centrality | pro_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT1G43770 | 0.021530 | 0.08375 | 0.01125 | 3 | 3.116530 |
| IAA32 | 0.019025 | 0.01500 | 0.08875 | 3 | 3.122775 |
| AT3G21810 | 0.003324 | 0.02750 | 0.00500 | 3 | 3.035824 |
| AGL42 | 0.003204 | 0.00500 | 0.01750 | 3 | 3.025704 |
| MYB88 | 0.001494 | 0.44375 | 0.00250 | 3 | 3.447744 |
| AT2G24680 | 0.000341 | 0.06875 | 0.00125 | 3 | 3.070341 |
| HAC4 | 0.000217 | 0.00250 | 0.01375 | 3 | 3.016467 |
| RAV1 | 0.000127 | 0.30875 | 0.00125 | 3 | 3.310127 |
| CHR24 | 0.000119 | 0.07625 | 0.00125 | 3 | 3.077619 |
| MYBR1 | 0.000077 | 0.17250 | 0.00125 | 3 | 3.173827 |
| AT2G46735 | 0.000047 | 0.00750 | 0.01125 | 3 | 3.018797 |
| HK3 | 0.000045 | 0.07750 | 0.00750 | 3 | 3.085045 |
| AT3G08505 | 0.000042 | 0.02625 | 0.03750 | 3 | 3.063792 |
| MYB3R-3 | 0.000020 | 0.03000 | 0.00250 | 3 | 3.032520 |
| DEAR3 | 0.000020 | 0.03750 | 0.00625 | 3 | 3.043770 |
| WRKY70 | 0.000016 | 0.07875 | 0.00125 | 3 | 3.080016 |
| IAA10 | 0.000016 | 0.06500 | 0.00250 | 3 | 3.067516 |
| AT3G51120 | 0.000016 | 0.02000 | 0.00875 | 3 | 3.028766 |
| AT5G58620 | 0.000013 | 0.03625 | 0.00125 | 3 | 3.037513 |
| GATA26 | 0.000011 | 0.03750 | 0.00375 | 3 | 3.041261 |
| AIP2 | 0.000011 | 0.04875 | 0.00500 | 3 | 3.053761 |
| LAS | 0.000008 | 0.00250 | 0.03750 | 3 | 3.040008 |
| AT1G05920 | 0.000006 | 0.00750 | 0.00875 | 3 | 3.016256 |
| ARIA | 0.000006 | 0.01000 | 0.00875 | 3 | 3.018756 |
| TOC1 | 0.000006 | 0.03625 | 0.00375 | 3 | 3.040006 |
| AT3G47120 | 0.000006 | 0.02250 | 0.01125 | 3 | 3.033756 |
| JMJ18 | 0.000005 | 0.01500 | 0.00250 | 3 | 3.017505 |
| bZIP52 | 0.000005 | 0.00750 | 0.00250 | 3 | 3.010005 |
| CSP3 | 0.000003 | 0.00875 | 0.00750 | 3 | 3.016253 |
| MYC4 | 0.000003 | 0.00125 | 0.01750 | 3 | 3.018753 |
| HAC1 | 0.000003 | 0.02250 | 0.00500 | 3 | 3.027503 |
| RHA1 | 0.000003 | 0.02750 | 0.00375 | 3 | 3.031253 |
| AT1G63490 | 0.000003 | 0.03125 | 0.01000 | 3 | 3.041253 |
| AT2G27580 | 0.000003 | 0.03250 | 0.00125 | 3 | 3.033753 |
| SHL1 | 0.000003 | 0.01375 | 0.00625 | 3 | 3.020003 |
| CHR5 | 0.000003 | 0.01875 | 0.01125 | 3 | 3.030003 |
| BBX19 | 0.000002 | 0.02000 | 0.00875 | 3 | 3.028752 |
| EMB3114 | 0.000002 | 0.00750 | 0.00125 | 3 | 3.008752 |
| AT3G13040 | 0.000002 | 0.03125 | 0.00500 | 3 | 3.036252 |
| FLD | 0.000002 | 0.01625 | 0.01000 | 3 | 3.026252 |
| NPH4 | 0.000002 | 0.01375 | 0.00500 | 3 | 3.018752 |
| AT5G40710 | 0.000002 | 0.01375 | 0.00250 | 3 | 3.016252 |
| NLP7 | 0.000002 | 0.01250 | 0.00625 | 3 | 3.018752 |
| AT1G74120 | 0.000002 | 0.02000 | 0.00500 | 3 | 3.025002 |
| BZO2H1 | 0.000002 | 0.01625 | 0.00250 | 3 | 3.018752 |
| POB1 | 0.000002 | 0.01000 | 0.00125 | 3 | 3.011252 |
| AT2G23780 | 0.000002 | 0.01125 | 0.00125 | 3 | 3.012502 |
| ABF2 | 0.000002 | 0.02250 | 0.00250 | 3 | 3.025002 |
In [46]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[46]:
| xyl_betweenness_centrality | xyl_out_centrality | xyl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| LBD31 | 0.006023 | 0.082237 | 0.001645 | 3 | 3.089905 |
| TCP20 | 0.000713 | 0.251645 | 0.004934 | 3 | 3.257292 |
| AT3G03590 | 0.000290 | 0.090461 | 0.003289 | 3 | 3.094040 |
| VND6 | 0.000068 | 0.006579 | 0.018092 | 3 | 3.024739 |
| DOF1 | 0.000051 | 0.264803 | 0.001645 | 3 | 3.266499 |
| AT5G63260 | 0.000024 | 0.004934 | 0.014803 | 3 | 3.019761 |
| LBD23 | 0.000022 | 0.008224 | 0.003289 | 3 | 3.011535 |
| AT5G47390 | 0.000019 | 0.024671 | 0.008224 | 3 | 3.032914 |
| HAG5 | 0.000011 | 0.024671 | 0.003289 | 3 | 3.027971 |
| AT4G12850 | 0.000005 | 0.014803 | 0.008224 | 3 | 3.023032 |
| ZML1 | 0.000003 | 0.003289 | 0.006579 | 3 | 3.009871 |
| AT1G19490 | 0.000003 | 0.001645 | 0.003289 | 3 | 3.004937 |
| bHLH121 | 0.000003 | 0.001645 | 0.019737 | 3 | 3.021384 |
| FBH4 | 0.000003 | 0.003289 | 0.004934 | 3 | 3.008226 |
| AT2G37520 | 0.000003 | 0.001645 | 0.001645 | 3 | 3.003292 |
| AT-HSFA5 | 0.000003 | 0.003289 | 0.021382 | 3 | 3.024674 |
In [47]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[47]:
| phl_betweenness_centrality | phl_out_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| bZIP19 | 0.000258 | 0.085900 | 0.012966 | 3 | 3.099123 |
| AGL80 | 0.000158 | 0.038898 | 0.021070 | 3 | 3.060125 |
| ARF17 | 0.000076 | 0.009724 | 0.024311 | 3 | 3.034112 |
| AT3G51950 | 0.000074 | 0.092382 | 0.003241 | 3 | 3.095698 |
| AT5G16470 | 0.000013 | 0.019449 | 0.003241 | 3 | 3.022704 |
| NF-YC3 | 0.000005 | 0.058347 | 0.006483 | 3 | 3.064835 |
| AT5G65130 | 0.000003 | 0.001621 | 0.006483 | 3 | 3.008106 |
| bZIP68 | 0.000003 | 0.001621 | 0.011345 | 3 | 3.012969 |
| AT3G16940 | 0.000003 | 0.004862 | 0.003241 | 3 | 3.008106 |
| GT-1 | 0.000003 | 0.004862 | 0.003241 | 3 | 3.008106 |
In [48]:
tf_spec.to_csv('scRNAseq_TF_GRN_centrality_t0-t1_zscore3.csv', index=True)
In [49]:
tf_spec
Out[49]:
| tf_occurance | atri_degree_centrality | atri_out_centrality | atri_in_centrality | atri_betweenness_centrality | atri_closeness_centrality | atri_eigenvector_centrality | tri_degree_centrality | tri_out_centrality | tri_in_centrality | ... | phl_in_centrality | phl_betweenness_centrality | phl_closeness_centrality | phl_eigenvector_centrality | col_degree_centrality | col_out_centrality | col_in_centrality | col_betweenness_centrality | col_closeness_centrality | col_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BBX29 | 10.0 | 0.247312 | 0.053763 | 0.193548 | 0.002442 | 0.000278 | 0.076342 | 0.110063 | 0.006289 | 0.103774 | ... | 0.089141 | 0.036019 | 0.000107 | 0.120473 | 0.420918 | 0.140306 | 0.280612 | 0.000464 | 0.000211 | 0.068903 |
| ATWHY2 | 10.0 | 0.325269 | 0.290323 | 0.034946 | 0.000877 | 0.000313 | 0.087843 | 0.103774 | 0.081761 | 0.022013 | ... | 0.030794 | 0.000276 | 0.000081 | 0.040414 | 0.127551 | 0.075255 | 0.052296 | 0.003810 | 0.000172 | 0.034190 |
| HAT1 | 10.0 | 0.279570 | 0.112903 | 0.166667 | 0.009238 | 0.000282 | 0.079547 | 0.078616 | 0.003145 | 0.075472 | ... | 0.129660 | 0.018804 | 0.000109 | 0.138462 | 0.355867 | 0.116071 | 0.239796 | 0.000269 | 0.000210 | 0.065037 |
| HMGB5 | 10.0 | 0.720430 | 0.521505 | 0.198925 | 0.065806 | 0.000297 | 0.130504 | 0.345912 | 0.248428 | 0.097484 | ... | 0.116694 | 0.000329 | 0.000105 | 0.067510 | 0.431122 | 0.156888 | 0.274235 | 0.000350 | 0.000214 | 0.072473 |
| AT4G25210 | 10.0 | 0.134409 | 0.037634 | 0.096774 | 0.000196 | 0.000261 | 0.051972 | 0.044025 | 0.009434 | 0.034591 | ... | 0.048622 | 0.000318 | 0.000087 | 0.053194 | 0.144133 | 0.025510 | 0.118622 | 0.000007 | 0.000185 | 0.038891 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| AT3G07500 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000056 | 0.005611 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| AT2G04845 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000064 | 0.004459 | 0.005102 | 0.003827 | 0.001276 | 0.000000 | 0.000074 | 0.001810 |
| RAP2.7 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000090 | 0.035443 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| DAR7 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.030612 | 0.028061 | 0.002551 | 0.000000 | 0.000102 | 0.012089 |
| KAN | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.005102 | 0.002551 | 0.002551 | 0.000000 | 0.000081 | 0.001867 |
966 rows × 61 columns
In [ ]: